interactive ggplot graphs in R with ggiraph
https://www.infoworld.com/article/3626911/easy-interactive-ggplot-graphs-in-r-with-ggiraph.html
“Use a ggiraph interactive geom instead of a “regular” ggplot geom.
The format is easy to remember: Just add _interactive to your usual geom.
So, geom_col() for a regular bar chart would be geom_col_interactive(), geom_point() would be geom_point_interactive(), and so on.
Add at least one interactive argument to the graph’s aes() mapping: tooltip, data_id, or onclick.
That data_id argument is what connects two graphics, letting you hover over one and affect the display of another one — all without Shiny."
“After creating your ggiraph dataviz object, use the girafe() function to turn it into a JavaScript graphic.
Yes, that’s girafe() like the animal but with one f - the creator of ggiraph, David Gohel, lives in Paris.”
library(dplyr)
library(ggplot2)
library(ggiraph)
library(patchwork)
library(lubridate)
library(scales) # For the percent_format() function in ggplot axis
# for Canada map
library(sf) # for st_read
https://health-infobase.canada.ca/covid-19/epidemiological-summary-covid-19-cases.html
https://resources-covid19canada.hub.arcgis.com/datasets/provincial-daily-totals/explore
data_url_can <- "Provincial_Daily_Totals.csv"
data_url_can_pop <- "1710000901-eng-Canada-Census-2021-pop.csv"
all_data_can <- read.csv(data_url_can)
need to remove meta data … first 9 rows
pop_data_can <- read.csv(data_url_can_pop, skip = 10, header = F)
names(pop_data_can)[1] <- "Province"
names(pop_data_can)[2] <- "Population"
pop_data_can$Province = toupper(pop_data_can$Province)
pop_data_can$Province
## [1] "CANADA"
## [2] "NEWFOUNDLAND AND LABRADOR"
## [3] "PRINCE EDWARD ISLAND"
## [4] "NOVA SCOTIA"
## [5] "NEW BRUNSWICK"
## [6] "QUEBEC"
## [7] "ONTARIO"
## [8] "MANITOBA"
## [9] "SASKATCHEWAN"
## [10] "ALBERTA"
## [11] "BRITISH COLUMBIA"
## [12] "YUKON"
## [13] "NORTHWEST TERRITORIES 5"
## [14] "NUNAVUT 5"
## [15] "FOOTNOTES:"
## [16] "1"
## [17] "2"
## [18] "3"
## [19] "4"
## [20] "5"
## [21] "HOW TO CITE: STATISTICS CANADA. TABLE 17-10-0009-01 POPULATION ESTIMATES, QUARTERLY"
## [22] "HTTPS://WWW150.STATCAN.GC.CA/T1/TBL1/EN/TV.ACTION?PID=1710000901"
pop_data_can$Province[pop_data_can$Province=="NUNAVUT 5"] <- "NUNAVUT"
#pop_data_can$Province[pop_data_can$Province=="NEWFOUNDLAND AND LABRADOR"] <- "NL"
pop_data_can$Province[pop_data_can$Province=="PRINCE EDWARD ISLAND"] <- "PEI"
pop_data_can$Province[pop_data_can$Province=="NORTHWEST TERRITORIES 5"] <- "NWT"
# pop_data_can$Province[pop_data_can$Province=="BRITISH COLUMBIA"] <- "BC"
pop_data_can <- pop_data_can[-c(15:22), ]
pop_data_can$Province
## [1] "CANADA" "NEWFOUNDLAND AND LABRADOR"
## [3] "PEI" "NOVA SCOTIA"
## [5] "NEW BRUNSWICK" "QUEBEC"
## [7] "ONTARIO" "MANITOBA"
## [9] "SASKATCHEWAN" "ALBERTA"
## [11] "BRITISH COLUMBIA" "YUKON"
## [13] "NWT" "NUNAVUT"
head(all_data_can)
## OBJECTID Province Abbreviation DailyTotals SummaryDate
## 1 1 ALBERTA AB 0 2020/01/25 12:00:00+00
## 2 2 NWT NT 0 2020/01/25 12:00:00+00
## 3 3 YUKON YT 0 2020/01/25 12:00:00+00
## 4 4 SASKATCHEWAN SK 0 2020/01/25 12:00:00+00
## 5 5 PEI PE 0 2020/01/25 12:00:00+00
## 6 6 ONTARIO ON 1 2020/01/25 12:00:00+00
## TotalCases TotalRecovered DailyRecovered TotalDeaths DailyDeaths TotalTested
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 1 0 0 0 0 0
## DailyTested TotalActive DailyActive TotalHospitalized DailyHospitalized
## 1 0 0 0 NA NA
## 2 0 0 0 NA NA
## 3 0 0 0 NA NA
## 4 0 0 0 NA NA
## 5 0 0 0 NA NA
## 6 0 1 1 NA NA
## TotalICU DailyICU TotalVaccinated DailyVaccinated TotalDose1 DailyDose1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## TotalDose2 DailyDose2 TotalBooster DailyBooster
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
str(all_data_can)
## 'data.frame': 11685 obs. of 26 variables:
## $ OBJECTID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Province : chr "ALBERTA" "NWT" "YUKON" "SASKATCHEWAN" ...
## $ Abbreviation : chr "AB" "NT" "YT" "SK" ...
## $ DailyTotals : int 0 0 0 0 0 1 0 0 0 0 ...
## $ SummaryDate : chr "2020/01/25 12:00:00+00" "2020/01/25 12:00:00+00" "2020/01/25 12:00:00+00" "2020/01/25 12:00:00+00" ...
## $ TotalCases : int 0 0 0 0 0 1 0 0 0 0 ...
## $ TotalRecovered : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DailyRecovered : int 0 0 0 0 0 0 0 0 0 0 ...
## $ TotalDeaths : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DailyDeaths : int 0 0 0 0 0 0 0 0 0 0 ...
## $ TotalTested : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DailyTested : int 0 0 0 0 0 0 0 0 0 0 ...
## $ TotalActive : int 0 0 0 0 0 1 0 0 0 0 ...
## $ DailyActive : int 0 0 0 0 0 1 0 0 0 0 ...
## $ TotalHospitalized: int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyHospitalized: int NA NA NA NA NA NA NA NA NA NA ...
## $ TotalICU : int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyICU : int NA NA NA NA NA NA NA NA NA NA ...
## $ TotalVaccinated : int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyVaccinated : int NA NA NA NA NA NA NA NA NA NA ...
## $ TotalDose1 : int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyDose1 : int NA NA NA NA NA NA NA NA NA NA ...
## $ TotalDose2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyDose2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ TotalBooster : int NA NA NA NA NA NA NA NA NA NA ...
## $ DailyBooster : int NA NA NA NA NA NA NA NA NA NA ...
class(pop_data_can)
## [1] "data.frame"
head(pop_data_can)
## Province Population
## 1 CANADA 38,436,447
## 2 NEWFOUNDLAND AND LABRADOR 521,758
## 3 PEI 165,936
## 4 NOVA SCOTIA 998,832
## 5 NEW BRUNSWICK 794,300
## 6 QUEBEC 8,631,147
str(pop_data_can)
## 'data.frame': 14 obs. of 2 variables:
## $ Province : chr "CANADA" "NEWFOUNDLAND AND LABRADOR" "PEI" "NOVA SCOTIA" ...
## $ Population: chr "38,436,447" "521,758" "165,936" "998,832" ...
glimpse(pop_data_can)
## Rows: 14
## Columns: 2
## $ Province <chr> "CANADA", "NEWFOUNDLAND AND LABRADOR", "PEI", "NOVA SCOTIA"…
## $ Population <chr> "38,436,447", "521,758", "165,936", "998,832", "794,300", "…
unique(all_data_can[c("Province")])
## Province
## 1 ALBERTA
## 2 NWT
## 3 YUKON
## 4 SASKATCHEWAN
## 5 PEI
## 6 ONTARIO
## 7 NEW BRUNSWICK
## 8 REPATRIATED
## 9 NOVA SCOTIA
## 10 NL
## 11 MANITOBA
## 12 BC
## 13 NUNAVUT
## 14 QUEBEC
## 1053 PRINCE EDWARD ISLAND
## 1054 NEWFOUNDLAND AND LABRADOR
## 1056 BRITISH COLUMBIA
## 1063 NORTHWEST TERRITORIES
## 1064 REPATRIATED CDN
## 1135 CANADA
not_provinces <- c("CANADA", "REPATRIATED", "REPATRIATED CDN")
class(all_data_can$SummaryDate) # character
## [1] "character"
# create new column
all_data_can$Date <- ""
# move position of Date column
# moved date column to first column
# https://stackoverflow.com/questions/3369959/moving-columns-within-a-data-frame-without-retyping
data.table::setcolorder(all_data_can, "Date")
# move Date column to 7th position
data.table::setcolorder(all_data_can, 2:6)
all_data_can$Date <- as.Date(all_data_can$SummaryDate)
class(all_data_can$Date) # date
## [1] "Date"
bar_graph_data_recent_can <- all_data_can %>%
filter(Date == max(Date), !(Province %in% not_provinces)) %>%
select(Province, TotalVaccinated)
bar_graph_totvac <- ggplot(bar_graph_data_recent_can,
aes(x = reorder(Province, TotalVaccinated),
y = TotalVaccinated)) +
geom_col(color = "black", fill="#6a8de1", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 10)) +
labs(title = "Fully Vaccinated March 23, 2022",
subtitle = "Data from https://resources-covid19canada.hub.arcgis.com"
) +
ylab("Total Vaccinated") +
xlab("") +
scale_y_continuous(labels = comma_format()) +
coord_flip()
bar_graph_totvac
to create comma-separated integers used scales::comma(columnname, 1)
bar_graph_data_recent_can <- bar_graph_data_recent_can %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
scales::comma(TotalVaccinated, 1), " (#)")
)
latest_vax_graph <- ggplot(bar_graph_data_recent_can,
aes(x = reorder(Province, TotalVaccinated),
y = TotalVaccinated,
tooltip = tooltip_text, data_id = Province #<<
)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) + #<<
theme_minimal() +
theme(axis.text=element_text(size = 6)) + #<<
labs(title = "Fully Vaccinated March 23, 2022",
subtitle = "Data from https://resources-covid19canada.hub.arcgis.com"
) +
ylab("Total Vaccinated") +
xlab("") +
scale_y_continuous(labels = comma_format()) +
coord_flip()
girafe(ggobj = latest_vax_graph, width_svg = 5, height_svg = 4)
all_data_can_pop <- merge(all_data_can, pop_data_can, by="Province")
# move population column to beginning
data.table::setcolorder(all_data_can_pop, "Population")
# create new column for PercentVaccinated
all_data_can_pop$PercentVaccinated <- ""
# move PercentVaccinated column to beginning
data.table::setcolorder(all_data_can_pop, "PercentVaccinated")
# move PercentVaccinated column to 23th position
data.table::setcolorder(all_data_can_pop, 2:22)
class(all_data_can_pop$PercentVaccinated) # character >> numeric
## [1] "character"
class(all_data_can_pop$TotalVaccinated) # integer > numeric >>>> total vaccinated is adding up dose 1 + dose 2 +
## [1] "integer"
class(all_data_can_pop$Population) # character
## [1] "character"
# Change character to numeric >> worked
all_data_can_pop$PercentVaccinated <- as.numeric(all_data_can_pop$PercentVaccinated)
all_data_can_pop$TotalVaccinated <- as.numeric(all_data_can_pop$TotalVaccinated)
# N/A introduced by coercion???? trying to convert Population from character to numeric with NAs
# all_data_can_pop$Population1 <- as.numeric(all_data_can_pop$Population)
class(all_data_can_pop$Population) # character >> factor >> numeric
## [1] "character"
# Population column has commas, which wouldn't let the characters be converted to numeric,
# after removing commas, it converts properly
all_data_can_pop$Population <- as.numeric(gsub(",", "", all_data_can_pop$Population))
all_data_can_pop <- dplyr::mutate(all_data_can_pop, PercentVaccinated = TotalVaccinated / Population)
all_data_can_pop <- dplyr::mutate(all_data_can_pop, PercentVaccinatedDose1 = TotalDose1 / Population)
all_data_can_pop <- dplyr::mutate(all_data_can_pop, PercentVaccinatedDose2 = TotalDose2 / Population)
all_data_can_pop <- dplyr::mutate(all_data_can_pop, PercentVaccinatedBooster = TotalBooster / Population)
# move position of percentage columns
# moved date column to first column
# https://stackoverflow.com/questions/3369959/moving-columns-within-a-data-frame-without-retyping
data.table::setcolorder(all_data_can_pop, "PercentVaccinatedDose1")
# move Date column to 7th position
data.table::setcolorder(all_data_can_pop, 2:25)
data.table::setcolorder(all_data_can_pop, "PercentVaccinatedDose2")
data.table::setcolorder(all_data_can_pop, 2:28)
data.table::setcolorder(all_data_can_pop, "PercentVaccinatedBooster")
data.table::setcolorder(all_data_can_pop, 2:32)
str(all_data_can_pop)
## 'data.frame': 9348 obs. of 32 variables:
## $ Population : num 4464170 4464170 4464170 4464170 4464170 ...
## $ Province : chr "ALBERTA" "ALBERTA" "ALBERTA" "ALBERTA" ...
## $ OBJECTID : int 11928 8480 1865 1730 4280 2090 387 445 432 3170 ...
## $ Abbreviation : chr "AB" "AB" "AB" "AB" ...
## $ DailyTotals : int 0 60 19 45 406 15 0 0 0 0 ...
## $ SummaryDate : chr "2022/01/29 12:00:00+00" "2021/06/21 12:00:00+00" "2020/05/20 12:00:00+00" "2020/05/12 12:00:00+00" ...
## $ Date : Date, format: "2022-01-29" "2021-06-21" ...
## $ TotalCases : int 487436 231419 6735 6345 23402 7091 0 0 0 11430 ...
## $ TotalRecovered : int 442605 227124 5637 4866 19734 6611 0 0 0 10097 ...
## $ DailyRecovered : int 0 182 -217 207 234 24 0 0 0 0 ...
## $ TotalDeaths : int 3531 2292 128 118 296 146 0 0 0 208 ...
## $ DailyDeaths : int 0 2 0 1 3 1 0 0 0 0 ...
## $ TotalTested : int 6728210 4643107 221571 191330 1668277 275524 0 0 0 743885 ...
## $ DailyTested : int 0 2841 2607 3173 14916 4226 0 0 0 0 ...
## $ TotalActive : int 41300 2003 970 1361 3372 334 0 0 0 1125 ...
## $ DailyActive : int 0 -124 236 -163 169 -10 0 0 0 0 ...
## $ TotalHospitalized : int 1496 214 58 73 113 48 NA NA NA 68 ...
## $ DailyHospitalized : int 0 0 -3 0 -3 0 NA NA NA 0 ...
## $ TotalICU : int 105 56 7 12 16 6 NA NA NA 18 ...
## $ DailyICU : int 0 3 -1 0 0 0 NA NA NA 0 ...
## $ TotalVaccinated : num 8255417 3804700 NA NA NA ...
## $ PercentVaccinated : num 1.849 0.852 NA NA NA ...
## $ DailyVaccinated : int 0 29026 NA NA NA NA NA NA NA NA ...
## $ TotalDose1 : int 3548760 2691359 NA NA NA NA NA NA NA NA ...
## $ PercentVaccinatedDose1 : num 0.795 0.603 NA NA NA ...
## $ DailyDose1 : int 0 3454 NA NA NA NA NA NA NA NA ...
## $ TotalDose2 : int 3275269 1113341 NA NA NA NA NA NA NA NA ...
## $ PercentVaccinatedDose2 : num 0.734 0.249 NA NA NA ...
## $ DailyDose2 : int 0 25572 NA NA NA NA NA NA NA NA ...
## $ TotalBooster : int 1431208 NA NA NA NA NA NA NA NA NA ...
## $ DailyBooster : int 0 NA NA NA NA NA NA NA NA NA ...
## $ PercentVaccinatedBooster: num 0.321 NA NA NA NA ...
# create new column for PercentVaccinated
all_data_can_pop$PercentVaccinatedDose1_percent <- ""
all_data_can_pop$PercentVaccinatedDose2_percent <- ""
all_data_can_pop$PercentVaccinatedBooster_percent <- ""
all_data_can_pop$PercentVaccinatedDose1_percent <- percent(all_data_can_pop$PercentVaccinatedDose1, accuracy = .01)
all_data_can_pop$PercentVaccinatedDose2_percent <- percent(all_data_can_pop$PercentVaccinatedDose2, accuracy = .01)
all_data_can_pop$PercentVaccinatedBooster_percent <- percent(all_data_can_pop$PercentVaccinatedBooster, accuracy = .01)
bar_graph_data_recent_can_dose1 <- all_data_can_pop %>%
filter(Date == max(Date), !(Province %in% not_provinces)) %>%
select(Province, PercentVaccinatedDose1, PercentVaccinatedDose1_percent)
#str(all_data_can_pop)
bar_graph_dose1 <- ggplot(bar_graph_data_recent_can_dose1,
aes(x = reorder(Province, PercentVaccinatedDose1),
y = PercentVaccinatedDose1)) +
geom_col(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 8)) +
labs(title = "Dose1 2022-03-23") +
ylab(" Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
coord_flip()
#bar_graph_dose1
bar_graph_data_recent_can_dose1 <- bar_graph_data_recent_can_dose1 %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedDose1_percent)
)
latest_vax_graph_dose1 <- ggplot(bar_graph_data_recent_can_dose1,
aes(x = reorder(Province, PercentVaccinatedDose1),
y = PercentVaccinatedDose1,
tooltip = tooltip_text, data_id = Province #<<
)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) + #<<
theme_minimal() +
theme(axis.text=element_text(size = 6)) + #<<
labs(title = "Dose1 2022-03-23") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) + # if using PercentVaccinatedDose1_percent as axis
coord_flip()
girafe(ggobj = latest_vax_graph_dose1, width_svg = 5, height_svg = 4)
bar_graph_data_early_dose1 <- all_data_can_pop %>%
filter(Date == "2021-06-17", !(Province %in% not_provinces)) %>%
arrange() %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n", PercentVaccinatedDose1_percent)
) %>%
select(Province, PercentVaccinatedDose1, PercentVaccinatedDose1_percent, tooltip_text)
bar_graph_data_early_dose1 <- bar_graph_data_early_dose1 %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedDose1_percent)
)
early_vax_graph_dose1 <- ggplot(bar_graph_data_early_dose1,
aes(x = reorder(Province, PercentVaccinatedDose1),
y = PercentVaccinatedDose1,
tooltip = tooltip_text, data_id = Province)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 6)) +
labs(title = "Dose1 2021-06-17") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) +
coord_flip()
girafe(ggobj = early_vax_graph_dose1, width_svg = 5, height_svg = 4)
bar_graph_data_recent_can_dose2 <- all_data_can_pop %>%
filter(Date == max(Date), !(Province %in% not_provinces)) %>%
select(Province, PercentVaccinatedDose2, PercentVaccinatedDose2_percent)
#str(all_data_can_pop)
bar_graph_dose2 <- ggplot(bar_graph_data_recent_can_dose2,
aes(x = reorder(Province, PercentVaccinatedDose2),
y = PercentVaccinatedDose2)) +
geom_col(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 8)) +
labs(title = "Dose2 2022-03-23",
subtitle = "Data from https://resources-covid19canada.hub.arcgis.com"
) +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
coord_flip()
#bar_graph_dose2
bar_graph_data_recent_can_dose2 <- bar_graph_data_recent_can_dose2 %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedDose2_percent)
)
latest_vax_graph_dose2 <- ggplot(bar_graph_data_recent_can_dose2,
aes(x = reorder(Province, PercentVaccinatedDose2),
y = PercentVaccinatedDose2,
tooltip = tooltip_text, data_id = Province #<<
)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) + #<<
theme_minimal() +
theme(axis.text=element_text(size = 6)) + #<<
labs(title = "Dose2 2022-03-23") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) +
coord_flip()
girafe(ggobj = latest_vax_graph_dose2, width_svg = 5, height_svg = 4)
bar_graph_data_early_dose2 <- all_data_can_pop %>%
filter(Date == "2021-06-17", !(Province %in% not_provinces)) %>%
arrange() %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n", PercentVaccinatedDose2_percent)
) %>%
select(Province, PercentVaccinatedDose2, PercentVaccinatedDose2_percent, tooltip_text)
bar_graph_data_early_dose2 <- bar_graph_data_early_dose2 %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedDose2_percent)
)
early_vax_graph_dose2 <- ggplot(bar_graph_data_early_dose2,
aes(x = reorder(Province, PercentVaccinatedDose2),
y = PercentVaccinatedDose2,
tooltip = tooltip_text, data_id = Province)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 6)) +
labs(title = "Dose2 2021-06-17") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) +
coord_flip()
girafe(ggobj = early_vax_graph_dose2, width_svg = 5, height_svg = 4)
bar_graph_data_recent_can_Booster <- all_data_can_pop %>%
filter(Date == max(Date), !(Province %in% not_provinces)) %>%
select(Province, PercentVaccinatedBooster, PercentVaccinatedBooster_percent)
# select(Province, PercentVaccinatedBooster, PercentVaccinatedBooster_percent)
note: Newfoundland has a na value, which plots wrong
https://stackoverflow.com/questions/17216358/eliminating-nas-from-a-ggplot
#str(all_data_can_pop)
bar_graph_Booster <- ggplot(bar_graph_data_recent_can_Booster,
aes(x = reorder(Province, PercentVaccinatedBooster),
y = PercentVaccinatedBooster)) +
geom_col(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 8)) +
labs(title = "Booster 2022-03-23") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
coord_flip()
#bar_graph_Booster
bar_graph_data_recent_can_Booster <- bar_graph_data_recent_can_Booster %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedBooster_percent)
)
latest_vax_graph_Booster <- ggplot(bar_graph_data_recent_can_Booster,
aes(x = reorder(Province, PercentVaccinatedBooster),
y = PercentVaccinatedBooster,
tooltip = tooltip_text, data_id = Province #<<
)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) + #<<
theme_minimal() +
theme(axis.text=element_text(size = 6)) + #<<
labs(title = "Booster 2022-03-23") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) +
coord_flip()
girafe(ggobj = latest_vax_graph_Booster, width_svg = 5, height_svg = 4)
## Warning: Removed 1 rows containing missing values (position_stack).
bar_graph_data_early_Booster <- all_data_can_pop %>%
filter(Date == "2021-12-20", !(Province %in% not_provinces)) %>%
arrange() %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n", PercentVaccinatedBooster_percent)
) %>%
select(Province, PercentVaccinatedBooster, PercentVaccinatedBooster_percent, tooltip_text)
bar_graph_data_early_Booster <- bar_graph_data_early_Booster %>%
mutate(
tooltip_text = paste0(toupper(Province), "\n",
PercentVaccinatedBooster_percent)
)
early_vax_graph_Booster <- ggplot(bar_graph_data_early_Booster,
aes(x = reorder(Province, PercentVaccinatedBooster),
y = PercentVaccinatedBooster,
tooltip = tooltip_text, data_id = Province)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 6)) +
labs(title = "Booster 2021-12-20") +
ylab("% Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
# scale_y_discrete(guide = guide_axis(angle = 90)) +
coord_flip()
girafe(ggobj = early_vax_graph_Booster, width_svg = 5, height_svg = 4)
girafe(code = print(early_vax_graph_dose1 + latest_vax_graph_dose1 + early_vax_graph_dose2 + latest_vax_graph_dose2 + early_vax_graph_Booster + latest_vax_graph_Booster),
width_svg = 12, height_svg = 8) %>%
girafe_options(opts_hover(css = "fill:#e1be6a;"))
## Warning: Removed 1 rows containing missing values (position_stack).
see canmap.Rmd
https://github.com/kjhealy/canmap
canada_cd <- st_read("data/canada_cd_sim.geojson", quiet = TRUE)
canada_cd
## Simple feature collection with 293 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -141.0181 ymin: 41.7297 xmax: -52.6194 ymax: 83.1355
## Geodetic CRS: NAD83
## First 10 features:
## CDUID CDNAME CDTYPE PRUID PRNAME
## 1 4609 Division No. 9 CDR 46 Manitoba
## 2 5901 East Kootenay RD 59 British Columbia / Colombie-Britannique
## 3 5933 Thompson-Nicola RD 59 British Columbia / Colombie-Britannique
## 4 4816 Division No. 16 CDR 48 Alberta
## 5 5919 Cowichan Valley RD 59 British Columbia / Colombie-Britannique
## 6 4621 Division No. 21 CDR 46 Manitoba
## 7 4608 Division No. 8 CDR 46 Manitoba
## 8 4811 Division No. 11 CDR 48 Alberta
## 9 4802 Division No. 2 CDR 48 Alberta
## 10 5951 Bulkley-Nechako RD 59 British Columbia / Colombie-Britannique
## rmapshaperid geometry
## 1 0 MULTIPOLYGON (((-97.9474 50...
## 2 1 MULTIPOLYGON (((-114.573 49...
## 3 2 MULTIPOLYGON (((-120.1425 5...
## 4 3 MULTIPOLYGON (((-110 60, -1...
## 5 4 MULTIPOLYGON (((-123.658 48...
## 6 5 MULTIPOLYGON (((-99.0172 55...
## 7 6 MULTIPOLYGON (((-98.6436 50...
## 8 7 MULTIPOLYGON (((-112.8438 5...
## 9 8 MULTIPOLYGON (((-111.3881 5...
## 10 9 MULTIPOLYGON (((-124.4407 5...
## Map theme
theme_map <- function(base_size=9, base_family="") {
require(grid)
theme_bw(base_size=base_size, base_family=base_family) %+replace%
theme(axis.line=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
axis.title.x = element_text(size = 8),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid=element_blank(),
panel.spacing=unit(0, "lines"),
plot.background=element_blank(),
legend.justification = c(0,0),
legend.position = c(0,0),
plot.title = element_text(face = "bold", colour = "black", size = 10),
plot.subtitle = element_text(face = "italic", colour = "black", size = 6, hjust = 0.5, vjust = 1),
plot.caption = element_text(hjust = 0.1, face = "italic", size = 6, vjust = 1)
)
}
theme_set(theme_map())
## Loading required package: grid
Transform the coordinates to a Lambert Conformal Conic Projection.
See https://www.statcan.gc.ca/pub/92-195-x/2011001/other-autre/mapproj-projcarte/m-c-eng.htm
canada_cd <- st_transform(canada_cd, crs = "+proj=lcc +lat_1=49 +lat_2=77 +lon_0=-91.52 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs")
canada_cd
## Simple feature collection with 293 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2529065 ymin: 5816874 xmax: 2793734 ymax: 10396190
## CRS: +proj=lcc +lat_1=49 +lat_2=77 +lon_0=-91.52 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
## First 10 features:
## CDUID CDNAME CDTYPE PRUID PRNAME
## 1 4609 Division No. 9 CDR 46 Manitoba
## 2 5901 East Kootenay RD 59 British Columbia / Colombie-Britannique
## 3 5933 Thompson-Nicola RD 59 British Columbia / Colombie-Britannique
## 4 4816 Division No. 16 CDR 48 Alberta
## 5 5919 Cowichan Valley RD 59 British Columbia / Colombie-Britannique
## 6 4621 Division No. 21 CDR 46 Manitoba
## 7 4608 Division No. 8 CDR 46 Manitoba
## 8 4811 Division No. 11 CDR 48 Alberta
## 9 4802 Division No. 2 CDR 48 Alberta
## 10 5951 Bulkley-Nechako RD 59 British Columbia / Colombie-Britannique
## rmapshaperid geometry
## 1 0 MULTIPOLYGON (((-457449.9 6...
## 2 1 MULTIPOLYGON (((-1628202 69...
## 3 2 MULTIPOLYGON (((-1838098 74...
## 4 3 MULTIPOLYGON (((-988280.5 7...
## 5 4 MULTIPOLYGON (((-2253700 71...
## 6 5 MULTIPOLYGON (((-461578.4 7...
## 7 6 MULTIPOLYGON (((-500377.6 6...
## 8 7 MULTIPOLYGON (((-1350599 73...
## 9 8 MULTIPOLYGON (((-1372235 69...
## 10 9 MULTIPOLYGON (((-1921887 78...
Make a vector of repeated colors—just to fill in the map, for decoration only “as I don’t have any Canadian data to merge in at present”.
map_colors <- RColorBrewer::brewer.pal(8, "Pastel1")
map_colors <- rep(map_colors, 37)
## Draw the map
p <- ggplot(data = canada_cd,
mapping = aes(fill = PRUID))
p_out <- p + geom_sf(color = "gray80",
size = 0.1) +
scale_fill_manual(values = map_colors) +
guides(fill = FALSE) +
theme_map() +
theme(panel.grid.major = element_line(color = "white"),
legend.key = element_rect(color = "gray40", size = 0.1))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
ggsave("figures/canada.pdf", p_out, height = 12, width = 15)
p_out
names(canada_cd)[5] <- "Province"
canada_cd$Province = toupper(canada_cd$Province)
#canada_cd$Province
pop_data_can$Province
## [1] "CANADA" "NEWFOUNDLAND AND LABRADOR"
## [3] "PEI" "NOVA SCOTIA"
## [5] "NEW BRUNSWICK" "QUEBEC"
## [7] "ONTARIO" "MANITOBA"
## [9] "SASKATCHEWAN" "ALBERTA"
## [11] "BRITISH COLUMBIA" "YUKON"
## [13] "NWT" "NUNAVUT"
canada_cd$Province[canada_cd$Province=="PRINCE EDWARD ISLAND / ÎLE-DU-PRINCE-ÉDOUARD"] <- "PEI"
canada_cd$Province[canada_cd$Province=="NORTHWEST TERRITORIES / TERRITOIRES DU NORD-OUEST"] <- "NWT"
canada_cd$Province[canada_cd$Province=="NEWFOUNDLAND AND LABRADOR / TERRE-NEUVE-ET-LABRADOR"] <- "NEWFOUNDLAND AND LABRADOR"
canada_cd$Province[canada_cd$Province=="BRITISH COLUMBIA / COLOMBIE-BRITANNIQUE"] <- "BRITISH COLUMBIA"
canada_cd$Province[canada_cd$Province=="QUEBEC / QUÉBEC"] <- "QUEBEC"
canada_cd$Province[canada_cd$Province=="NOVA SCOTIA / NOUVELLE-ÉCOSSE"] <- "NOVA SCOTIA"
canada_cd$Province[canada_cd$Province=="NEW BRUNSWICK / NOUVEAU-BRUNSWICK"] <- "NEW BRUNSWICK"
# canada_cd$Province
class(canada_cd)
## [1] "sf" "data.frame"
str(canada_cd)
## Classes 'sf' and 'data.frame': 293 obs. of 7 variables:
## $ CDUID : chr "4609" "5901" "5933" "4816" ...
## $ CDNAME : chr "Division No. 9" "East Kootenay" "Thompson-Nicola" "Division No. 16" ...
## $ CDTYPE : chr "CDR" "RD" "RD" "CDR" ...
## $ PRUID : chr "46" "59" "59" "48" ...
## $ Province : chr "MANITOBA" "BRITISH COLUMBIA" "BRITISH COLUMBIA" "ALBERTA" ...
## $ rmapshaperid: int 0 1 2 3 4 5 6 7 8 9 ...
## $ geometry :sfc_MULTIPOLYGON of length 293; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:59, 1:2] -457450 -458022 -458301 -457720 -457562 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA
## ..- attr(*, "names")= chr [1:6] "CDUID" "CDNAME" "CDTYPE" "PRUID" ...
p_can <- ggplot(data = canada_cd,
mapping = aes(fill = PRUID))
p_can_out <- p_can + geom_sf(color = "gray80",
size = 0.1) +
scale_fill_manual(values = map_colors) +
guides(fill = FALSE) +
labs(title = "Canada COVID % Vaccination 2021-2022",
subtitle = "click for interactive map/chart (Data:resources-covid19canada.hub.arcgis.com)") +
theme_map() +
theme(panel.grid.major = element_line(color = "white"),
legend.key = element_rect(color = "gray40", size = 0.1))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
#ggsave("figures/canada.pdf", p_can_out, height = 12, width = 15)
p_can_out
prov_map <- ggplot() +
labs(title = "Canada COVID % Vaccination 2021-2022",
subtitle = "click for interactive map/chart (Data:resources-covid19canada.hub.arcgis.com)") +
geom_sf_interactive(data = canada_cd, size = 0.125,
aes(data_id = Province, tooltip = Province)) +
geom_sf(color = "gray80",
size = 0.1) +
theme_map()
girafe(ggobj = prov_map + latest_vax_graph,
width_svg = 10, height_svg = 5) %>%
girafe_options(opts_hover(css = "fill:#e1be6a;"))
girafe(ggobj = prov_map + early_vax_graph_dose1 + latest_vax_graph_dose1 + early_vax_graph_dose2 + latest_vax_graph_dose2 + early_vax_graph_Booster + latest_vax_graph_Booster,
width_svg = 12, height_svg = 8) %>%
girafe_options(opts_hover(css = "fill:#e1be6a;"))
## Warning: Removed 1 rows containing missing values (position_stack).
my_widget <- girafe(ggobj = prov_map + latest_vax_graph,
width_svg = 10, height_svg = 5) %>%
girafe_options(opts_hover(css = "fill:#e1be6a;"))
htmlwidgets::saveWidget(my_widget, "my_widget_page.html",
selfcontained = TRUE)
girafe(code = print(early_vax_graph_dose1 + latest_vax_graph_dose1 + early_vax_graph_dose2 + latest_vax_graph_dose2 + early_vax_graph_Booster + latest_vax_graph_Booster),
width_svg = 10, height_svg = 5) %>%
girafe_options(opts_hover(css = "fill:#e1be6a;"))
## Warning: Removed 1 rows containing missing values (position_stack).
Message: Note: Using an external vector in selections is ambiguous. (instead of select(Province, x, y)) ℹ Use all_of(x) instead of x to silence this message.
ℹ See https://tidyselect.r-lib.org/reference/faq-external-vector.html
dose_TEST <- function(Date, x, y){
all_data_can_pop %>%
filter(Date == max(Date), !(Province %in% not_provinces)) %>%
select(Province, all_of(x), all_of(y))
}
… instead of having a separate function for each dose, specifying column To specify which columns and date fill in (date, “col1,”col2)
dose1_TEST <- dose_TEST(2021-06-17, "PercentVaccinatedDose1", "PercentVaccinatedDose1_percent")
# dose1 <- function(Date){
# dose1 <- all_data_can_pop %>%
# filter(Date == max(Date), !(Province %in% not_provinces)) %>%
# select(Province, PercentVaccinatedDose1, PercentVaccinatedDose1_percent)
# }
# e.g. try Data = 2021-06-17 (early dose1 and dose2); 2021-12-20 (for early booster)
dose1_TEST <- dose_TEST(2021-06-17, "PercentVaccinatedDose1", "PercentVaccinatedDose1_percent")
dose2_TEST <- dose_TEST(2021-06-17, "PercentVaccinatedDose2", "PercentVaccinatedDose2_percent")
booster_TEST <- dose_TEST(2021-12-20, "PercentVaccinatedBooster", "PercentVaccinatedBooster_percent")
# one function works same as 3 separate functions
# dose1_data_2021_06_17 <- dose1(2021-06-17)
# dose2_data_2021_06_17 <- dose2(2021-06-17)
# booster_data_2021_12_21 <- booster(2021-12-20)
tootip_per_col <- function(data, x){
data %>%
mutate(
tooltip_text_TEST = paste0(toupper("Province"), "\n",
x, "(%)")
)
}
need to fill in values tooltip_per_col(data, “data$columname”)
creates a tooltext column in data object, with % symbol
tootip_per_col(bar_graph_data_recent_can_dose1, bar_graph_data_recent_can_dose1$PercentVaccinatedDose1_percent)
## Province PercentVaccinatedDose1
## 1 ALBERTA 0.8012141
## 2 BRITISH COLUMBIA 0.8556563
## 3 MANITOBA 0.8367441
## 4 NEW BRUNSWICK 0.8780939
## 5 NEWFOUNDLAND AND LABRADOR 0.9549542
## 6 NOVA SCOTIA 0.8839625
## 7 NUNAVUT 0.8401324
## 8 ONTARIO 0.8439078
## 9 QUEBEC 0.8455321
## 10 SASKATCHEWAN 0.8200094
## 11 YUKON 0.8471516
## PercentVaccinatedDose1_percent tooltip_text
## 1 80.12% ALBERTA\n80.12%
## 2 85.57% BRITISH COLUMBIA\n85.57%
## 3 83.67% MANITOBA\n83.67%
## 4 87.81% NEW BRUNSWICK\n87.81%
## 5 95.50% NEWFOUNDLAND AND LABRADOR\n95.50%
## 6 88.40% NOVA SCOTIA\n88.40%
## 7 84.01% NUNAVUT\n84.01%
## 8 84.39% ONTARIO\n84.39%
## 9 84.55% QUEBEC\n84.55%
## 10 82.00% SASKATCHEWAN\n82.00%
## 11 84.72% YUKON\n84.72%
## tooltip_text_TEST
## 1 PROVINCE\n80.12%(%)
## 2 PROVINCE\n85.57%(%)
## 3 PROVINCE\n83.67%(%)
## 4 PROVINCE\n87.81%(%)
## 5 PROVINCE\n95.50%(%)
## 6 PROVINCE\n88.40%(%)
## 7 PROVINCE\n84.01%(%)
## 8 PROVINCE\n84.39%(%)
## 9 PROVINCE\n84.55%(%)
## 10 PROVINCE\n82.00%(%)
## 11 PROVINCE\n84.72%(%)
tootip_num_col <- function(data, x, y){
data %>%
mutate(
tooltip_text_TEST = paste0(toupper("Province"), "\n",
x, "(#)")
)
}
need to fill in values tooltip_per_col(data, data$columnname)
creates a tooltext column in data objectm, with # symbol
tootip_num_col(bar_graph_data_recent_can, bar_graph_data_recent_can$TotalVaccinated)
## Province TotalVaccinated
## 1 NUNAVUT 75394
## 2 YUKON 90642
## 3 PRINCE EDWARD ISLAND 369073
## 4 NEWFOUNDLAND AND LABRADOR 1256652
## 5 NEW BRUNSWICK 1733241
## 6 BRITISH COLUMBIA 11398503
## 7 NOVA SCOTIA 2217196
## 8 SASKATCHEWAN 2380812
## 9 ALBERTA 8528703
## 10 MANITOBA 2847639
## 11 QUEBEC 18505552
## 12 ONTARIO 31882192
## 13 NORTHWEST TERRITORIES 99299
## tooltip_text tooltip_text_TEST
## 1 NUNAVUT\n75,394 (#) PROVINCE\n75394(#)
## 2 YUKON\n90,642 (#) PROVINCE\n90642(#)
## 3 PRINCE EDWARD ISLAND\n369,073 (#) PROVINCE\n369073(#)
## 4 NEWFOUNDLAND AND LABRADOR\n1,256,652 (#) PROVINCE\n1256652(#)
## 5 NEW BRUNSWICK\n1,733,241 (#) PROVINCE\n1733241(#)
## 6 BRITISH COLUMBIA\n11,398,503 (#) PROVINCE\n11398503(#)
## 7 NOVA SCOTIA\n2,217,196 (#) PROVINCE\n2217196(#)
## 8 SASKATCHEWAN\n2,380,812 (#) PROVINCE\n2380812(#)
## 9 ALBERTA\n8,528,703 (#) PROVINCE\n8528703(#)
## 10 MANITOBA\n2,847,639 (#) PROVINCE\n2847639(#)
## 11 QUEBEC\n18,505,552 (#) PROVINCE\n18505552(#)
## 12 ONTARIO\n31,882,192 (#) PROVINCE\n31882192(#)
## 13 NORTHWEST TERRITORIES\n99,299 (#) PROVINCE\n99299(#)
latest_vax_graph_TEST <- ggplot(bar_graph_data_recent_can,
aes(x = reorder(Province, TotalVaccinated),
y = TotalVaccinated,
tooltip = tooltip_text_TEST, data_id = Province #<<
)) +
geom_col_interactive(color = "black", fill="#0072B2", size = 0.5) + #<<
theme_minimal() +
theme(axis.text=element_text(size = 6)) + #<<
labs(title = "Fully Vaccinated March 23, 2022",
subtitle = "Data from https://resources-covid19canada.hub.arcgis.com"
) +
ylab("Total Vaccinated") +
xlab("") +
scale_y_continuous(labels = comma_format()) +
coord_flip()
girafe(ggobj = latest_vax_graph, width_svg = 5, height_svg = 4)
tootip_com_col <- function(data){
data %>%
mutate(
tooltip_text_TEST = paste0(toupper("Province"), "\n",
scales::comma(TotalVaccinated, 1))
)
}
need to fill in values tooltip_per_col(data, “columname”)
creates a tooltext column in data object, with % symbol
tootip_com_col(bar_graph_data_recent_can)
## Province TotalVaccinated
## 1 NUNAVUT 75394
## 2 YUKON 90642
## 3 PRINCE EDWARD ISLAND 369073
## 4 NEWFOUNDLAND AND LABRADOR 1256652
## 5 NEW BRUNSWICK 1733241
## 6 BRITISH COLUMBIA 11398503
## 7 NOVA SCOTIA 2217196
## 8 SASKATCHEWAN 2380812
## 9 ALBERTA 8528703
## 10 MANITOBA 2847639
## 11 QUEBEC 18505552
## 12 ONTARIO 31882192
## 13 NORTHWEST TERRITORIES 99299
## tooltip_text tooltip_text_TEST
## 1 NUNAVUT\n75,394 (#) PROVINCE\n75,394
## 2 YUKON\n90,642 (#) PROVINCE\n90,642
## 3 PRINCE EDWARD ISLAND\n369,073 (#) PROVINCE\n369,073
## 4 NEWFOUNDLAND AND LABRADOR\n1,256,652 (#) PROVINCE\n1,256,652
## 5 NEW BRUNSWICK\n1,733,241 (#) PROVINCE\n1,733,241
## 6 BRITISH COLUMBIA\n11,398,503 (#) PROVINCE\n11,398,503
## 7 NOVA SCOTIA\n2,217,196 (#) PROVINCE\n2,217,196
## 8 SASKATCHEWAN\n2,380,812 (#) PROVINCE\n2,380,812
## 9 ALBERTA\n8,528,703 (#) PROVINCE\n8,528,703
## 10 MANITOBA\n2,847,639 (#) PROVINCE\n2,847,639
## 11 QUEBEC\n18,505,552 (#) PROVINCE\n18,505,552
## 12 ONTARIO\n31,882,192 (#) PROVINCE\n31,882,192
## 13 NORTHWEST TERRITORIES\n99,299 (#) PROVINCE\n99,299
function works to grab data into ggplot_fun
??? can I add column ???
ggplot_fun <- function(data, x){
ggplot(data,
aes(x = reorder(Province, PercentVaccinatedDose2),
y = PercentVaccinatedDose2)) +
geom_col(color = "black", fill="#0072B2", size = 0.5) +
theme_minimal() +
theme(axis.text=element_text(size = 8)) +
labs(title = "Dose1 2022-03-23") +
ylab(" Vaccinated") +
xlab("") +
scale_y_continuous(labels = percent_format(), limits=c(0,1)) +
coord_flip()
}
fill in (data, col1, col2))
ggplot_fun(bar_graph_data_recent_can_dose2, PercentVaccinatedDose2_percent)